Load data

[1] 38 [1] TRUE [1] 454 38

 ATP      DEX     IFNy      IL4      LPS     R848     TNFa   unstim 
   5       10       79        8      128       42       47      135 

ununstim 42

DREAM model

# The variable to be tested must be a fixed effect
names(metadata_cultured) = tolower(names(metadata_cultured))

form <- ~ age + (1|donor_id) + stimulation + picard_pct_ribosomal_bases + region  + picard_pct_mrna_bases + age + picard_pct_percent_duplication + picard_pct_pf_reads_aligned 

# estimate weights using linear mixed model of dream
#vobjDream = voomWithDreamWeights( genes_counts_cultured, form, metadata_cultured )

# Fit the dream model on each gene
#fit = (dream( vobjDream, form, metadata_cultured ))

#res_age <- data.frame(topTable(fit, coef='age', number=nrow(genes_counts_cultured), sort.by = "p"), check.names = F)
#male age as coefficient
#save(res_age, file ="res_age.Rdata")

DE genes 15%

load("/Users/gijsjesnijders/Documents/MiGASti/Databases/res_age.Rdata")
load("~/Downloads/genes_counts_cultured.Rdata")
gencode_30 = read.table("~/Documents/MiGASti/Databases/ens.geneid.gencode.v30")
colnames(gencode_30) = c("ensembl","symbol")
res_age <- tibble::rownames_to_column(res_age, "ensembl")
res_name_age <- merge(res_age, gencode_30, by = "ensembl")
sign_age <- subset(res_age, adj.P.Val < 0.15)
length(rownames(sign_age))
## [1] 1348

DE genes 10%

sign_age <- subset(res_name_age, adj.P.Val < 0.10)
length(rownames(sign_age))
## [1] 789

DE genes 5%

sign_age <- subset(res_name_age, adj.P.Val < 0.05)
length(rownames(sign_age))
## [1] 311

Plots

FDR distibution

res = res_name_age
p = ggplot(res, aes(P.Value))
p + geom_density(color="darkblue", fill="lightblue") +
theme_classic() +
ggtitle("FDR Distribution")

Fold change distribution

p = ggplot(res, aes(logFC))
p + geom_density(color = "darkblue", fill = "lightblue") +
theme_classic() +
ggtitle("Fold Change Distribution")

MA plot

plot.data = res
plot.data$id = rownames(plot.data)
data = data.frame(plot.data)
data$P.Value = -log10(data$P.Value)
data$fifteen = as.factor(abs(data$adj.P.Val < 0.05))
ma = ggplot(data, aes(AveExpr, logFC, color = fifteen))
ma + geom_point() +
scale_color_manual(values = c("black", "red"), labels = c ("> 0.05", "< 0.05")) +
labs(title = "MA plot", color = "labels") +
theme_classic()

#theme(plot.title = element_text(hjust = 0.5)) + ylim (-10,10) + xlim(-4,22)

Volcano plot

vp = ggplot(data, aes(logFC, P.Value, color = fifteen))
vp + geom_point() +
scale_color_manual(values = c("black", "red"), labels = c("> 0.05", "< 0.05")) +
labs(title = "Gene Level Volcano Plot", color = "FDR") +
#theme(plot.title = element_text(hjust = 0.5)) +
theme_classic() +
xlim(-1,1) + ylim(0, 20) + ylab("-log10 pvalue")

Overlap with MiGA aging genes (input: 1693 genes)

list of genes

setwd("~/Documents/MiGA/Revision")
MIGA = "~/Documents/MiGA/Revision/AgeMIGA.xlsx"
MIGA = read_excel(MIGA, col_names = TRUE) 
MIGA = as.data.frame(MIGA)
overlap <- merge(sign_age, MIGA, by = "symbol")
list(overlap$symbol)
## [[1]]
##  [1] "A1BG"       "A1BG-AS1"   "AC022568.1" "AC129507.1" "AGAP2-AS1" 
##  [6] "AIM2"       "AL445524.1" "ANXA1"      "AP3M2"      "APBA2"     
## [11] "ATPAF1"     "BBOF1"      "BX640514.2" "C1orf162"   "C22orf34"  
## [16] "CBY1"       "CD163"      "CDYL2"      "CHCHD6"     "CLEC2B"    
## [21] "CLEC4D"     "CLIC2"      "CMAHP"      "CPNE8"      "CST7"      
## [26] "CSTA"       "DDO"        "DOCK5"      "EFEMP2"     "EMP2"      
## [31] "FSTL5"      "GGT1"       "GOLGA6L7"   "HOTAIRM1"   "IL15"      
## [36] "IL2RG"      "LINC00996"  "LINC02432"  "LSP1"       "LTA4H"     
## [41] "MAP1LC3A"   "MCTP1"      "MOV10L1"    "MS4A6A"     "MT2A"      
## [46] "NMRAL2P"    "NR3C1"      "NUDT16P1"   "NUPR1"      "NXF3"      
## [51] "PAQR5"      "PBX3"       "PFKP"       "PPP1R26P1"  "PSTPIP1"   
## [56] "PTPN7"      "S100A4"     "SELL"       "SH3BP5"     "SLC29A3"   
## [61] "SMIM14"     "SMIM3"      "SPATS2L"    "TSTD1"      "VNN2"      
## [66] "VSIG4"      "ZFPM2-AS1"  "ZNF24"      "ZNF287"     "ZNF300P1"  
## [71] "ZNF454"     "ZNF804A"

Significant enrichment?

set1 <- sign_age
set2 <- MIGA

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Boxplot top 6 genes age per stimulation

#note. pvalues based on wilcox test of log2(tpm)+1 without additional correction(not based on pvalue DEGs with DREAM)

metadata <- read.table("~/Documents/MiGASti/Databases/metadata.txt", quote="\"", comment.char="")
#set rownames to Sample
row.names(metadata) <- metadata$Sample 
#exclude samples that did not pass QC
exclude <- read.csv("~/Documents/MiGASti/Databases/samples2remove.txt", sep="")
exclude <- exclude$x
genes_tpm_filt = genes_tpm[, !colnames(genes_tpm) %in% exclude] 
#Excludes the samples from filters. 
#dim(genes_tpm_filt)
metadata_filt = metadata[ !(rownames(metadata) %in% exclude), ]
length(metadata_filt)
## [1] 38
genes_tpm_filt = log2((genes_tpm_filt) + 1)
genes_tpm_filt <- as.data.frame(genes_tpm_filt)
genes_tpm_ordered <- genes_tpm_filt[,rownames(metadata_filt)]
#head(genes_tpm_ordered)
all(rownames(metadata_filt) == colnames (genes_tpm_ordered)) #TRUE
## [1] TRUE

Boxplot top 6 genes LPS

samples_baseline = metadata_filt$Sample[metadata_filt$Stimulation %in% "unstim"] # Includes same donor 
samples_condition = metadata_filt$Sample[metadata_filt$Stimulation %in% "LPS"] # Includes same donor  

metadata4deg = metadata_filt[rownames(metadata_filt) %in% c(samples_baseline,samples_condition) ,]

genes_voom_baseline_condition = as.data.frame(genes_tpm_ordered[, colnames(genes_tpm_ordered) %in% c(samples_baseline, samples_condition)])
# dim(genes_voom_baseline_condition)
# dim(metadata4deg)

deg_lists = res_name_age[order(res_name_age$adj.P.Val) ,]
top_6 = head(deg_lists)
data.table(top_6)
gene2check = as.data.frame(genes_voom_baseline_condition[top_6$ensembl, ])

gene2check$ensembl = rownames(gene2check)
gene2check = merge(gene2check, top_6[, c("symbol", "ensembl")], by = "ensembl")

names(metadata4deg) = tolower(names(metadata4deg))

gene2check_m = melt(gene2check, id.vars = c("ensembl", "symbol"))
gene2check_charac = merge(gene2check_m, metadata4deg, by.x = "variable", by.y = "sample")

ggplot(gene2check_charac, aes(x = stimulation, y = value, fill = stimulation)) +
  geom_boxplot(notch = F, na.rm = T) +
  geom_jitter() +
  theme_bw() + facet_wrap(~symbol, scales = "free_y") +
  ggpubr::stat_compare_means(label = "p.format", label.x.npc = "centre", method = "wilcox.test")
## Registered S3 methods overwritten by 'car':
##   method                          from
##   influence.merMod                lme4
##   cooks.distance.influence.merMod lme4
##   dfbeta.influence.merMod         lme4
##   dfbetas.influence.merMod        lme4

Boxplot top 6 genes IFNy

samples_baseline = metadata_filt$Sample[metadata_filt$Stimulation %in% "unstim"] # Includes same donor 
samples_condition = metadata_filt$Sample[metadata_filt$Stimulation %in% "IFNy"] # Includes same donor  

metadata4deg = metadata_filt[rownames(metadata_filt) %in% c(samples_baseline,samples_condition) ,]

genes_voom_baseline_condition = as.data.frame(genes_tpm_ordered[, colnames(genes_tpm_ordered) %in% c(samples_baseline, samples_condition)])
# dim(genes_voom_baseline_condition)
# dim(metadata4deg)


deg_lists = res_name_age[order(res_name_age$adj.P.Val) ,]
top_6 = head(deg_lists)
data.table(top_6)
gene2check = as.data.frame(genes_voom_baseline_condition[top_6$ensembl, ])

gene2check$ensembl = rownames(gene2check)
gene2check = merge(gene2check, top_6[, c("symbol", "ensembl")], by = "ensembl")

names(metadata4deg) = tolower(names(metadata4deg))

gene2check_m = melt(gene2check, id.vars = c("ensembl", "symbol"))
gene2check_charac = merge(gene2check_m, metadata4deg, by.x = "variable", by.y = "sample")

ggplot(gene2check_charac, aes(x = stimulation, y = value, fill = stimulation)) +
  geom_boxplot(notch = F, na.rm = T) +
  geom_jitter() +
  theme_bw() + facet_wrap(~symbol, scales = "free_y") +
  ggpubr::stat_compare_means(label = "p.format", label.x.npc = "centre", method = "wilcox.test")

Boxplot top 6 genes R848

samples_baseline = metadata_filt$Sample[metadata_filt$Stimulation %in% "unstim"] # Includes same donor 
samples_condition = metadata_filt$Sample[metadata_filt$Stimulation %in% "R848"] # Includes same donor  

metadata4deg = metadata_filt[rownames(metadata_filt) %in% c(samples_baseline,samples_condition) ,]

genes_voom_baseline_condition = as.data.frame(genes_tpm_ordered[, colnames(genes_tpm_ordered) %in% c(samples_baseline, samples_condition)])
# dim(genes_voom_baseline_condition)
# dim(metadata4deg)


deg_lists = res_name_age[order(res_name_age$adj.P.Val) ,]
top_6 = head(deg_lists)
data.table(top_6)
gene2check = as.data.frame(genes_voom_baseline_condition[top_6$ensembl, ])

gene2check$ensembl = rownames(gene2check)
gene2check = merge(gene2check, top_6[, c("symbol", "ensembl")], by = "ensembl")

names(metadata4deg) = tolower(names(metadata4deg))

gene2check_m = melt(gene2check, id.vars = c("ensembl", "symbol"))
gene2check_charac = merge(gene2check_m, metadata4deg, by.x = "variable", by.y = "sample")

ggplot(gene2check_charac, aes(x = stimulation, y = value, fill = stimulation)) +
  geom_boxplot(notch = F, na.rm = T) +
  geom_jitter() +
  theme_bw() + facet_wrap(~symbol, scales = "free_y") +
  ggpubr::stat_compare_means(label = "p.format", label.x.npc = "centre", method = "wilcox.test")

Boxplot top 6 genes TNFa

samples_baseline = metadata_filt$Sample[metadata_filt$Stimulation %in% "unstim"] # Includes same donor 
samples_condition = metadata_filt$Sample[metadata_filt$Stimulation %in% "TNFa"] # Includes same donor  

metadata4deg = metadata_filt[rownames(metadata_filt) %in% c(samples_baseline,samples_condition) ,]

genes_voom_baseline_condition = as.data.frame(genes_tpm_ordered[, colnames(genes_tpm_ordered) %in% c(samples_baseline, samples_condition)])
# dim(genes_voom_baseline_condition)
# dim(metadata4deg)


deg_lists = res_name_age[order(res_name_age$adj.P.Val) ,]
top_6 = head(deg_lists)
data.table(top_6)
gene2check = as.data.frame(genes_voom_baseline_condition[top_6$ensembl, ])

gene2check$ensembl = rownames(gene2check)
gene2check = merge(gene2check, top_6[, c("symbol", "ensembl")], by = "ensembl")

names(metadata4deg) = tolower(names(metadata4deg))

gene2check_m = melt(gene2check, id.vars = c("ensembl", "symbol"))
gene2check_charac = merge(gene2check_m, metadata4deg, by.x = "variable", by.y = "sample")

ggplot(gene2check_charac, aes(x = stimulation, y = value, fill = stimulation)) +
  geom_boxplot(notch = F, na.rm = T) +
  geom_jitter() +
  theme_bw() + facet_wrap(~symbol, scales = "free_y") +
  ggpubr::stat_compare_means(label = "p.format", label.x.npc = "centre", method = "wilcox.test")

Boxplot top 6 genes DEX

samples_baseline = metadata_filt$Sample[metadata_filt$Stimulation %in% "unstim"] # Includes same donor 
samples_condition = metadata_filt$Sample[metadata_filt$Stimulation %in% "DEX"] # Includes same donor  

metadata4deg = metadata_filt[rownames(metadata_filt) %in% c(samples_baseline,samples_condition) ,]

genes_voom_baseline_condition = as.data.frame(genes_tpm_ordered[, colnames(genes_tpm_ordered) %in% c(samples_baseline, samples_condition)])
# dim(genes_voom_baseline_condition)
# dim(metadata4deg)


deg_lists = res_name_age[order(res_name_age$adj.P.Val) ,]
top_6 = head(deg_lists)
data.table(top_6)
gene2check = as.data.frame(genes_voom_baseline_condition[top_6$ensembl, ])

gene2check$ensembl = rownames(gene2check)
gene2check = merge(gene2check, top_6[, c("symbol", "ensembl")], by = "ensembl")

names(metadata4deg) = tolower(names(metadata4deg))

gene2check_m = melt(gene2check, id.vars = c("ensembl", "symbol"))
gene2check_charac = merge(gene2check_m, metadata4deg, by.x = "variable", by.y = "sample")

ggplot(gene2check_charac, aes(x = stimulation, y = value, fill = stimulation)) +
  geom_boxplot(notch = F, na.rm = T) +
  geom_jitter() +
  theme_bw() + facet_wrap(~symbol, scales = "free_y") +
  ggpubr::stat_compare_means(label = "p.format", label.x.npc = "centre", method = "wilcox.test")

Boxplot top 6 genes IL4

samples_baseline = metadata_filt$Sample[metadata_filt$Stimulation %in% "unstim"] # Includes same donor 
samples_condition = metadata_filt$Sample[metadata_filt$Stimulation %in% "IL4"] # Includes same donor  

metadata4deg = metadata_filt[rownames(metadata_filt) %in% c(samples_baseline,samples_condition) ,]

genes_voom_baseline_condition = as.data.frame(genes_tpm_ordered[, colnames(genes_tpm_ordered) %in% c(samples_baseline, samples_condition)])
# dim(genes_voom_baseline_condition)
# dim(metadata4deg)


deg_lists = res_name_age[order(res_name_age$adj.P.Val) ,]
top_6 = head(deg_lists)
data.table(top_6)
gene2check = as.data.frame(genes_voom_baseline_condition[top_6$ensembl, ])

gene2check$ensembl = rownames(gene2check)
gene2check = merge(gene2check, top_6[, c("symbol", "ensembl")], by = "ensembl")

names(metadata4deg) = tolower(names(metadata4deg))

gene2check_m = melt(gene2check, id.vars = c("ensembl", "symbol"))
gene2check_charac = merge(gene2check_m, metadata4deg, by.x = "variable", by.y = "sample")

ggplot(gene2check_charac, aes(x = stimulation, y = value, fill = stimulation)) +
  geom_boxplot(notch = F, na.rm = T) +
  geom_jitter() +
  theme_bw() + facet_wrap(~symbol, scales = "free_y") +
  ggpubr::stat_compare_means(label = "p.format", label.x.npc = "centre", method = "wilcox.test")

Boxplot top 6 genes ATP

samples_baseline = metadata_filt$Sample[metadata_filt$Stimulation %in% "unstim"] # Includes same donor 
samples_condition = metadata_filt$Sample[metadata_filt$Stimulation %in% "ATP"] # Includes same donor  

metadata4deg = metadata_filt[rownames(metadata_filt) %in% c(samples_baseline,samples_condition) ,]

genes_voom_baseline_condition = as.data.frame(genes_tpm_ordered[, colnames(genes_tpm_ordered) %in% c(samples_baseline, samples_condition)])
# dim(genes_voom_baseline_condition)
# dim(metadata4deg)


deg_lists = res_name_age[order(res_name_age$adj.P.Val) ,]
top_6 = head(deg_lists)
data.table(top_6)
gene2check = as.data.frame(genes_voom_baseline_condition[top_6$ensembl, ])

gene2check$ensembl = rownames(gene2check)
gene2check = merge(gene2check, top_6[, c("symbol", "ensembl")], by = "ensembl")

names(metadata4deg) = tolower(names(metadata4deg))

gene2check_m = melt(gene2check, id.vars = c("ensembl", "symbol"))
gene2check_charac = merge(gene2check_m, metadata4deg, by.x = "variable", by.y = "sample")

ggplot(gene2check_charac, aes(x = stimulation, y = value, fill = stimulation)) +
  geom_boxplot(notch = F, na.rm = T) +
  geom_jitter() +
  theme_bw() + facet_wrap(~symbol, scales = "free_y") +
  ggpubr::stat_compare_means(label = "p.format", label.x.npc = "centre", method = "wilcox.test")

Data table for download

res_age_diff_top = res_name_age[, c("ensembl", "symbol", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "z.std")]
createDT(res_age_diff_top)
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html